local path V:\docs\sc_ops\budziak_lempert_jlc_replication
*replace path above with location where aesthetic_data_compact.dta is saved.
clear
use "`path'\nyt8081.dta", clear
sort id

*outline:
*draw sample of i=1,2...100 opinions, each containing w_osi words
*store 100 fkgl values for these ops 
*shuffle the reports
*get 100 fkgl values corresponding to chunks of length w_rs1,...w_rsj,...w_rs100
*where w_rsi=w_osi
*store these values.
*repeat for 100 samples/simulations.

*try this without frames
set seed 6921 //date

append using "`path'\mini_data.dta"
drop if __fkgl ==. & fkgl==. //nyt reports w/o fkgl scores.

*note that mini data vars except caseid start with double underscores.


qui forvalues s=1/100{ //simulations
	gen double ro=runiform() if id==.
	gen double rr=runiform() if id !=.
	sort ro rr
	*edit
	gen ofk_`s'=__fkgl if _n<=100
	gen rfk_`s'=.
	local jstart=4519
	forvalues i=1/100{ //100 opinion-length report excerpts
		local jend=`jstart' //technically could have put before loop.
		local rw=acrwords[`jend']
		local ow=__acrw[`i']
		while `rw' < `ow'{
			local jend=`jend'+1
			local rw=`rw'+acrwords[`jend']
		}
		gen wt=acrwords/`ow' if _n>=`jstart' & _n<=`jend'
		di "OW:" `ow' 
		di "RW:" `rw'
		di "acrwo_jend:" acrwords[`jend']
		di "JSTART:" `jstart'
		di "JEND:" `jend'
		li wt acrwords if wt !=.
		replace wt=(acrwords-`rw'+`ow')/`ow' if _n==`jend'
		li wt acrwords if wt !=.
		gen wtfk=wt*fkgl
		summ wt, det
		summ wtfk
		replace rfk_`s' = `r(sum)' in `i'
		drop wt wtfk 
		local jstart=`jend'+1
	}	
	drop ro rr
}


gen ofk_mean=.
gen rfk_mean=.
gen ofk_var=.
gen rfk_var=.

qui forvalues i=1/100{
	qui summ ofk_`i'
	replace ofk_mean=`r(mean)' in `i'
	replace ofk_var=`r(Var)' in `i'
	qui summ rfk_`i'
	replace rfk_mean=`r(mean)' in `i'
	replace rfk_var=`r(Var)' in `i'
}

gen mean_diff= ofk_mean-rfk_mean
la var mean_diff "ofk_mean-rfk_mean"

gen var_diff= ofk_var-rfk_var
la var var_diff "ofk_var-rfk_var"

summ ofk_mean, det
summ rfk_mean, det
summ ofk_var, det
summ rfk_var, det

summ mean_diff, det
summ var_diff, det



